Source of data and the following data description: https://archive.ics.uci.edu/ml/datasets/GPS+Trajectories.
Abstract: The dataset has been feed by Android app called Go!Track. It is available at Goolge Play Store.
Source of data and the following data description: https://archive.ics.uci.edu/ml/datasets/GPS+Trajectories.
Data Set Information:
The dataset is composed by two tables. The first table go_track_tracks presents general attributes and each instance has one trajectory that is represented by the table go_track_trackspoints.
Attribute Information:
##Load the data
dfTracks <- read.csv("./GPS_Trajectory/go_track_tracks.csv",
stringsAsFactors = FALSE)
dfPoints <- read.csv("./GPS_Trajectory/go_track_trackspoints.csv",
stringsAsFactors = FALSE)
Check for NA values and richness of ‘tracks’.
kable(head(dfTracks))
| id | id_android | speed | time | distance | rating | rating_bus | rating_weather | car_or_bus | linha |
|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 19.210586 | 0.1380489 | 2.652 | 3 | 0 | 0 | 1 | |
| 2 | 0 | 30.848229 | 0.1714847 | 5.290 | 3 | 0 | 0 | 1 | |
| 3 | 1 | 13.560101 | 0.0676986 | 0.918 | 3 | 0 | 0 | 2 | |
| 4 | 1 | 19.766679 | 0.3895444 | 7.700 | 3 | 0 | 0 | 2 | |
| 8 | 0 | 25.807401 | 0.1548006 | 3.995 | 2 | 0 | 0 | 1 | |
| 10 | 2 | 1.346913 | 0.0066819 | 0.009 | 2 | 0 | 0 | 1 |
kable(head(dfPoints))
| id | latitude | longitude | track_id | time |
|---|---|---|---|---|
| 1 | -10.93934 | -37.06274 | 1 | 2014-09-13 07:24:32 |
| 2 | -10.93934 | -37.06274 | 1 | 2014-09-13 07:24:37 |
| 3 | -10.93932 | -37.06276 | 1 | 2014-09-13 07:24:42 |
| 4 | -10.93921 | -37.06284 | 1 | 2014-09-13 07:24:47 |
| 5 | -10.93894 | -37.06288 | 1 | 2014-09-13 07:24:53 |
| 6 | -10.93854 | -37.06284 | 1 | 2014-09-13 07:24:59 |
#Rename 'id' to 'index' for dfPoints to elleviate confusion, for this file we want 'track_id'
dfPoints <- dfPoints %>% rename(index = id, id = track_id)
#order the two data frames and check to ensure that the respective ID's align
dfPoints <- dfPoints %>% arrange(id)
dfTracks <- dfTracks %>% arrange(id)
#Check which id's will not align, and what index they are for diagnosis
which(!(unique(dfTracks$id) == unique(dfPoints$id)))
## integer(0)
sum(!(unique(dfTracks$id) == unique(dfPoints$id)))
## [1] 0
# unique(dfTracks$id)
# unique(dfPoints$track_id)
# summary(dfTracks)
# summary(dfPoints)
With the respective IDs aligned between two df’s, join the two together
#Join based on 'id'
df <- left_join(dfPoints, dfTracks, by = "id")
rm(dfPoints); rm(dfTracks)
df <- df %>%
rename(user = id_android, date_time = time.x, lat = latitude, lon = longitude) %>%
select(id, user, lat, lon, date_time)
#Fix the time variable
df$date_time <- ymd_hms(df$date_time)
#Fix user from 0-27, to 1-28 and make catagorical (factor)
df$user <- as.factor(df$user + 1)
dim(df)
## [1] 18107 5
kable(head(df))
| id | user | lat | lon | date_time |
|---|---|---|---|---|
| 1 | 1 | -10.93934 | -37.06274 | 2014-09-13 07:24:32 |
| 1 | 1 | -10.93934 | -37.06274 | 2014-09-13 07:24:37 |
| 1 | 1 | -10.93932 | -37.06276 | 2014-09-13 07:24:42 |
| 1 | 1 | -10.93921 | -37.06284 | 2014-09-13 07:24:47 |
| 1 | 1 | -10.93894 | -37.06288 | 2014-09-13 07:24:53 |
| 1 | 1 | -10.93854 | -37.06284 | 2014-09-13 07:24:59 |
range(df$lon)
## [1] -48.63292 -36.49336
range(df$lat)
## [1] -27.60317 -10.29284
labs <- lapply(seq(nrow(df)), function(i) {
paste0( '<p><b>USER: </b>', df[i, "user"],
'</p><p><b>DATE: </b>', format(df[i, "date_time"], format="%d %b %y"),
'</p><p><b>TIME: </b>', format(df[i, "date_time"], format = "%H:%M"),
'</p>' )
} )
leafMap <- df %>%
leaflet() %>%
addTiles() %>%
setView(lat = mean(df$lat, na.rm = TRUE),
lng = mean(df$lon, na.rm = TRUE),
zoom = 6) %>%
addCircleMarkers(lng = ~lon, lat = ~lat,
color = "red", opacity = 1, radius = 0.2,
label = lapply(labs, htmltools::HTML) )
leafMap
The
user_id <- unique(df$user)
user_pal <- colorFactor(c(brewer.pal(n = length(user_id),
name = "Dark2")),
user_id) #Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3.
leafMap2 <- df %>%
leaflet() %>%
addTiles() %>%
setView(lat = mean(df$lat, na.rm = TRUE),
lng = mean(df$lon, na.rm = TRUE),
zoom = 12) %>%
addCircleMarkers(lng = ~lon, lat = ~lat,
color = ~user_pal(user), opacity = 1, radius = 0.15,
label = lapply(labs, htmltools::HTML) ) %>%
# addPolylines(lat = ~lat, lng = ~lon) %>%
addLayersControl(overlayGroups = c(user_id))
leafMap2
dfDup <- data.frame(user = df$user, date_time = df$date_time)
dfDup$date_time <- dmy(format(dfDup$date_time, format = "%d %b %y"))
dfDup <- dfDup[!duplicated(dfDup), ]
#which dates
dfDup$date_time[duplicated(dfDup$date_time)]
## [1] "2014-10-08" "2014-11-28" "2015-02-23" "2015-03-02" "2015-04-24"
## [6] "2015-05-19" "2015-05-19" "2015-05-20" "2015-05-22" "2015-05-28"
## [11] "2015-05-28" "2015-05-29" "2015-05-29" "2015-06-03" "2015-06-03"
dfplot <- df %>%
filter(format(date_time, format = "%m-%d-%Y") == "05-29-2015") %>%
arrange(user, date_time)
dfplot$user <- plyr::revalue(dfplot$user, c("2" = "Alison", "11" = "Rodd",
"13" = "Gregory"))
dfplot$user <- as.factor(as.character(dfplot$user))
labs <- lapply(seq(nrow(dfplot)), function(i) {
paste0( '<p><b>USER: </b>', dfplot[i, "user"],
'</p><p><b>DATE: </b>', format(dfplot[i, "date_time"], format="%d %b %y"),
'</p><p><b>TIME: </b>', format(dfplot[i, "date_time"], format = "%H:%M"),
'</p>' )
} )
user_id <- unique(dfplot$user)
user_pal <- colorFactor(c(brewer.pal(n = length(user_id),
name = "Set1")),
user_id) #Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3.
leafMap3 <- dfplot %>%
leaflet() %>%
addTiles() %>%
setView(lat = mean(dfplot$lat, na.rm = TRUE),
lng = mean(dfplot$lon, na.rm = TRUE),
zoom = 12) %>%
addCircleMarkers(lng = ~lon, lat = ~lat,
color = ~user_pal(user), opacity = 0.8, radius = 0.1,
label = lapply(labs, htmltools::HTML) ) %>%
addLayersControl(overlayGroups = c(user_id))
leafMap3
library(ggplot2)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
range(dfplot$lon)
## [1] -37.10016 -36.99810
range(df$lat)
## [1] -27.60317 -10.29284
base <- get_map(location = c(-37.1223, -11.0007, -36.9529, -10.8808),
maptype = "hybrid") #"hybrid" is likely googlemap; https://www.openstreetmap.org/export#map=13/-10.9408/-37.0376&layers=C
## Source : http://tile.stamen.com/terrain/13/3251/4345.png
## Source : http://tile.stamen.com/terrain/13/3252/4345.png
## Source : http://tile.stamen.com/terrain/13/3253/4345.png
## Source : http://tile.stamen.com/terrain/13/3254/4345.png
## Source : http://tile.stamen.com/terrain/13/3255/4345.png
## Source : http://tile.stamen.com/terrain/13/3251/4346.png
## Source : http://tile.stamen.com/terrain/13/3252/4346.png
## Source : http://tile.stamen.com/terrain/13/3253/4346.png
## Source : http://tile.stamen.com/terrain/13/3254/4346.png
## Source : http://tile.stamen.com/terrain/13/3255/4346.png
## Source : http://tile.stamen.com/terrain/13/3251/4347.png
## Source : http://tile.stamen.com/terrain/13/3252/4347.png
## Source : http://tile.stamen.com/terrain/13/3253/4347.png
## Source : http://tile.stamen.com/terrain/13/3254/4347.png
## Source : http://tile.stamen.com/terrain/13/3255/4347.png
map3 <- ggmap(base) +
geom_point(data = dfplot, aes(x = lon, y = lat, color = user),
shape = 20, cex = 0.25) +
geom_path(data = dfplot, aes(x = lon, y = lat, color = user)) +
labs(x="Latitude", y="Longitude", title="3 User Tracks - 29 May 2015") +
scale_color_manual(values = c("blue", "red", "purple")) +
theme_bw()
map3
## Warning: Removed 44 rows containing missing values (geom_point).
## Warning: Removed 44 row(s) containing missing values (geom_path).